---
title: "Replication code for CJPS article datasets"
author: "Isadora Borges Monroy"
date: "12/08/2025"
output: html_document
---
Recoded variables written to be as copy-paste friendly as possible with StatsCan controlled codebook. This code uses CES2021 as a template but can be substituted for the General Social Surveys 34 (version 3) and 35 (version 2) or the Impacts of COVID-19 on Canadians’ Experiences of Discrimination (version 1).

---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
setwd("YourWorkingDirectoryHere")
library(tidyverse)
library(magrittr)
library(janitor)
library(stargazer)
library(ggstatsplot)
library(scales)
library(ggeffects)
library(ggiraphExtra)
library(tidyselect)

CES2021 <- readstata13::read.dta13("CES June 2021/2021 data/2021 Canadian Election Study v1.0.dta")
load("CES June 2021/2021 data/2021 Canadian Election Study v1.0.RData")

CES2021 <- df
CES2021 %<>% rename(ResponseId = cps21_ResponseId, Weights= cps21_weight_general_all)
#dfcomplete <- read_rds("CES June 2021/dfCES2021_complete.R")

# If n=7380 then that's the old dataset
# dfimputed <- read_rds("CES June 2021/dfCES2021imputedAllin.R")
# dfunimputed <- read_rds("CES June 2021/dfCES2021.R")
# dfimputed <- read_rds("CES June 2021/dfCES2021imputed.R")

theme <- theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
       aspect.ratio = 1,
       legend.position="top", #legend.position="right",
       panel.background = element_rect(fill = "white"),
       #plot.margin = margin (0.1,0.1,0.1,0.1, "cm"),
       strip.background = element_blank(),
       panel.grid.minor=element_blank(),
       panel.grid.major= element_line(color = "seashell1"),
       plot.background=element_blank(),
       title = element_text(size = 20),
#    axis.text.x = element_blank(), #remove tickmarks
    axis.title.x = element_text(size = 20, colour = "black"),
    axis.title.y = element_text(size = 20, colour = "black"),
    legend.title = element_text(size = 18, colour = "black"),
    legend.text = element_text(size = 16, colour = "black"),
axis.text = element_text(size = 16))

Main_parties_color <- c("#D71920",#Liberal
                        "#33B2CC",#Bloc
                        "#1A4782",#Conservative
                        "#3D9B35",#Green 
                        "#F37021", #NDP
                        #"#2A317C", #People's
                        "gray")
```

# Part 1 Variable wrangling
##DVs 

CES confidence measures are only for postelection 
```{r DV confidence single-item}

CES2021$pes21_conf_inst2_6 %>% tabyl()
CES2021$`Confidence police` <- CES2021$pes21_conf_inst2_6
CES2021$`Confidence police` %>% tabyl 
CES2021 %<>% drop_na(`Confidence police`) 
CES2021$`Confidence police rescaled` <-  
  recode(CES2021$pes21_conf_inst2_6 %>% as.numeric, #Changed variable from 2019, check all others
                "1" = 1,
                "2" = 2/3,
                "3" = 1/3,
                "4" = 0,
         .missing = NA_real_, .default = NA_real_)

CES2021$`Confidence police rescaled` %>% tabyl()
CES2021$`Confidence police rescaled`%>% summary 
#n = 7401 as 190 NAs are excluded from analysis
```

```{r Protesting}
CES2021%<>% rename("Protesting" = pes21_partic1_2)
CES2021$Protesting %>% tabyl()
CES2021$Protesting %<>% recode_factor(.,
                "Never" = "None",
                "Don't know/ Prefer not to answer" = NA_character_,
                .default = "Yes")
CES2021$Protesting %>% tabyl()

```

```{r DV confidence standardized for SM}
CES2021$`Confidence politicians` <-  
  recode(CES2021$pes21_populism_4 %>% as.numeric(),
                "1" = 0,
                "2" = .25,
                "3" = .5,
                "4" = .75,
                "5" = 1,
         .missing = NA_real_, .default = NA_real_)

CES2021$`Confidence feds` <-  
  recode(CES2021$pes21_conf_inst1_1 %>% as.numeric,
                "1" = 1,
                "2" = 2/3,
                "3" = 1/3,
                "4" = 0,
         .missing = NA_real_, .default = NA_real_)

CES2021$`Confidence province` <-  
  recode(CES2021$pes21_conf_inst1_2 %>% as.numeric(),
                "1" = 1,
                "2" = 2/3,
                "3" = 1/3,
                "4" = 0,
         .missing = NA_real_, .default = NA_real_)

# Police
CES2021$pes21_conf_inst2_6 %>% tabyl()

CES2021 %>% select(., starts_with("Confidence "))%$%str(.) #make sure it has a space to drop two sorting variable

# Response rate for CES 2019 Confidence  

CES2021 %>%  select(starts_with("Confidence"))  %>%  map(~tabyl(.x))

CES2021 %<>% mutate(`standardized police confidence` = scales::rescale(`Confidence police rescaled` - (select(., `Confidence feds`, `Confidence police rescaled`, `Confidence politicians`, `Confidence province`)) %>% rowMeans(na.rm = TRUE)))

CES2021$`Confidence police rescaled`%>% summary
CES2021$`standardized police confidence` %>% summary #n = 7401 as 190 NAs are excluded from analysis
CES2021$`standardized police confidence` %>% tabyl()

```

##Main IV
```{r Race recode}
CES2021 %<>% mutate(`Ethnicity`= case_when(
 cps21_vismin_9 == "1" ~ "White", 
 cps21_vismin_3 == "1" ~ "Black",
 cps21_vismin_4 =="1" ~ "Indigenous", 
 TRUE ~ "Other Person of Colour"))

CES2021$Ethnicity%<>% factor() %>% relevel(., ref = "White") 
CES2021%$%summary(Ethnicity)
#CES2021$Ethnicity %>% recode("Other Person of Colour" = "PoC, other")

```

```{r Poverty}
CES2021%$%head(cps21_income_number)
CES2021 %<>% mutate(Income = case_when(
   cps21_income_number %in% 1:20000 | cps21_income_cat  == "$1 to $30,000" ~ "Below poverty line",
   cps21_income_number > 20000 | cps21_income_cat  != "$1 to $30,000" & cps21_income_cat  != "Don't know/ Prefer not to answer"  ~ "Above poverty",
 TRUE ~ NA_character_))
CES2021$Income%<>% fct_relevel("Above poverty", "Below poverty line")
CES2021$Income %>% tabyl()
```


```{r Immigration}
CES2021$cps21_bornin_canada %>% tabyl()
CES2021 %<>%rename("Place of birth" = cps21_bornin_canada) 
CES2021$`Place of birth` %<>% recode("Yes" = "Canada", 
                                     "No" = "Other", .default = NA_character_)

CES2021$`Place of birth` %>% tabyl()
```

```{r Gender}
CES2021 %<>% rename(Gender = cps21_genderid)
CES2021 %>% tabyl(Gender)
CES2021$Gender%<>% recode("A man" = "Male", .default = "Women, trans and non-binary") %>% fct_relevel(., "Male")
```

##Controls
```{r Age and Cohorts}
CES2021 %<>% rename(Age = cps21_age)
CES2021 %<>% mutate(Cohort = case_when(
  Age %in% c(18:25) ~ "GenZ and Millennial", #GenZ
  Age %in% c(26:41) ~ "GenZ and Millennial",  #Millenial
  Age %in% c(42:57) ~ "Gen X", 
  Age %in% c(58:76) ~ "Boomer", 
  Age > 76 ~ "Silent", TRUE ~ NA_character_))
CES2021$Cohort%<>% fct_relevel("GenZ and Millennial", "Gen X", "Boomer", "Silent")
CES2021%$%summary(Cohort)
CES2021%$%summary(Age)
```

```{r Education}
CES2021 %>% tabyl(cps21_education)
CES2021 %<>% mutate(Education = case_when(
      as.numeric(cps21_education) %in% c(1:4) ~ "Low", #Some secondary
      as.numeric(cps21_education)  %in%  c(5:8) ~ "Medium",
      as.numeric(cps21_education)  %in%  c(9:11) ~ "High", TRUE ~ NA_character_) %>%  fct_relevel(., "Low", "Medium", "High") %>%  fct_relevel("Low", "Medium", "High"))
CES2021 %>% tabyl(cps21_education, Education)
```

```{r Province}
CES2021 %>% tabyl(province) ##almost 3K English answers for provinces left blank but coded as whitespace instead of NA,
CES2021 %>% tabyl(Province) #French version is complete, make sure to call French for recoding (capitalized variable)
CES2021 %<>% rename(Province = province_fr) #"du Québec", "de la Colombie Britannique", "de l'Ontario", "de l'Alberta", "de Terre-Neuve-et-Labrador", "de la Saskatchewan", "du Manitoba", "du Nouveau-Brunswick", "du Yukon", "de la Nouvelle-Écosse", "des Territoires du Nord-Ouest", "de l'Ile-du-Prince-Édouard", "du Nunavut"
CES2021$Regions <- CES2021$Province
CES2021$Regions %<>% fct_relevel(.,"de l'Ontario")
CES2021$Regions %<>% fct_collapse(., 
                                  "Atlantic" = c("de Terre-Neuve-et-Labrador", "de l'Ile-du-Prince-Édouard", "de la Nouvelle-Écosse","du Nouveau-Brunswick"), 
                                  "Quebec" = "du Québec", 
                                  "British Columbia" = "de la Colombie Britannique", 
                                  "Ontario" = "de l'Ontario", 
                                  "West" = c("du Manitoba", "de la Saskatchewan", "de l'Alberta"),
                                  "North" = c("du Yukon", "des Territoires du Nord-Ouest", "du Nunavut"))

CES2021 %>% tabyl(Province, Regions)

```

##Political controls
```{r Partisanship and Ideology}
CES2021 %<>% 
  mutate(`Party ID` = case_when(
      cps21_fed_id == "Liberal" ~ "Liberal",
      cps21_fed_id == "Conservative" ~ "Conservative",
      cps21_fed_id == "ndp" ~ "NDP",
      cps21_fed_id == "Bloc Québécois" ~ "Bloc",
      cps21_fed_id == "Green" ~ "Green", 
      TRUE ~ "Other/None") %>% fct_relevel("Liberal"))
CES2021 %>% tabyl(cps21_fed_id, `Party ID`)

CES2021 %<>% mutate(Ideology = case_match(cps21_lr_scale_bef_1, -99 ~ -NA_real_, .default = cps21_lr_scale_bef_1))
CES2021$Ideology %>% tabyl()

```

#Prepare abridged wrangled dataset
```{r Selection}
df <- CES2021
df %<>% select(., Ethnicity,  Income, `Years in Canada`, `Place of birth`, Gender, Age, Cohort, Education, Province, Regions, `Confidence police`, `Confidence police rescaled`,  `Confidence politicians`, `Confidence feds`, `Confidence province`, Protesting, `standardized police confidence`, `Party ID`,  Ideology,  Weights, ResponseId)
glimpse(df)

#Create dataset
#write_rds(df, "dfCES2021.R") #abridged to our wrangled variables of interest
#write_rds(CES2021, "CES2021unimputedAll.R") #unimputed and all variables, with data wrangled 


```

#### Imputation 
```{r Imputation process}
df %>%select(-ResponseId, -Weights) %>% glimpse()
#Count NAs
df %>% map(~((sum(is.na(.x))) /length(.x)*100)) %>% as_tibble() %>% transpose(.) %>% simplify_all() %>% deframe()

library(mice) #Does not play well with `Variable names` so rename 
df%<>% rename(PoB= `Place of birth`, stdPoliceConfidence=`standardized police confidence`,  PID=`Party ID`, Confpoliticians=`Confidence politicians`, Conffeds=`Confidence feds`, Confprov=`Confidence province`, Confpolrs=`Confidence police rescaled`, Confpol=`Confidence police`)
df %>% colnames()

colnames(df)
imp  <-  mice(df, maxit=0) 
meth <-  imp$method
predM <-  imp$predictorMatrix
# Here I am specifying which variables should NOT be used for PREDICTION (outcomes, survey weights):
# variables that should NOT be IMPUTED (outcomes, main IV, survey weights):
predM[, c("stdPoliceConfidence",  "PID", "VoteFor", 
          "Gender", "Regions", "ResponseId", "Confpol", "Confpolrs")]=0  
# Diff methods must be used to impute continuous, binary, and ordinal vars. Here I am specifying which methods for the diff variables:
#None for the ones that shouldn't be imputed
meth[c("stdPoliceConfidence",  "PID", "Gender", "Regions", "ResponseId", "Confpol", "Confpolrs")]=""

#Numeric variables
meth[c("Age", "Ideology", "Weights")]="pmm" # numeric
meth[c("Income", "PoB")]="logreg" # dummy
meth[c("Ethnicity")]="polyreg" #Unordered categorical factor
meth[c("Cohort",  "Education")] = "polr" #Ordered categorical factor

#### Run the multiple (m=5) imputation --------
set.seed(1986)

#Select and run the next three lines together to see how quickly the impute
lubridate::now() 
imputed  <-  mice(df, method=meth, predictorMatrix=predM, m=5) 
lubridate::now() 

# Create dataframe after imputation
dfimputed <- mice::complete(imputed) 
# Check for missingness
dfimputed %>% map_df(~(sum(is.na(.x))) #/length(x) #as proportion of length
                   ) %>% as_tibble() %>% transpose(.) %>% simplify_all()

glimpse(df)

str(dfimputed)
dfimputed$Income %>% tabyl()

# Rename back to tidyverse convention for graphing
dfimputed %<>%rename( #For ggeffects and imputation
  `Place of birth`=POB,`standardized police confidence`=stdPoliceConfidence, `Party ID`=PID, `Confidence politicians`=Confpoliticians, `Confidence feds`=Conffeds, `Confidence province`=Confprov, `Confidence police rescaled`=Confpolrs, `Confidence police`=Confpol)

#create binary ethnicity variables for t-tests
dfimputed %<>% mutate(BW = case_when(Ethnicity == "White" ~ "White", 
                                     Ethnicity == "Black" ~ "Black", TRUE~NA_character_) %>% fct_relevel("White", after = 0L), 
                      IW = case_when(Ethnicity == "White" ~ "White", 
                                     Ethnicity == "Indigenous" ~ "Indigenous", 
                                     TRUE~NA_character_) %>% fct_relevel("White", after = 0L), 
                      oPoCW = case_when(Ethnicity == "White" ~ "White", 
                                     Ethnicity == "Other Person of Colour" ~ "Other Person of Colour", TRUE~NA_character_) %>% fct_relevel("White", after = 0L)) 

dfimputed %>% select(BW, IW, oPoCW)  %>%  map(~tabyl(.x)) %>% adorn_pct_formatting()

write_rds(dfimputed, "dfCES2021imputed.R") #Make a copy

dfimputed %>% tabyl(Ethnicity) %>% adorn_pct_formatting(2)
```

# Weighing data
```{r Weights}
# Computing survey weights (CES has: province, gender, age group, and education; ours add ethnicigy as a categorical (4) variable)

library(survey)

# create the unweighted survey object:
dfimputed.unweighted <- survey::svydesign(ids=~1, data=dfimputed) # ids=~1 means we don't have any weights. R gives us a warning saying we're not applying any weights

#Updated with 2021 data, population total in 2021 = 36991981 in 2021, n=35151728 in 2016
# Create the population data frames using census data:
region.dist <- data.frame(Regions = c("Ontario", "West", "Atlantic", "British Columbia", "North", "Quebec"), Freq = nrow(dfimputed)*c(0.384514201, 0.182128473, 0.0651, 0.135188191, 0.003194206, 0.229829081))

gender.dist <- data.frame(Gender = c("Male", "Women, trans and non-binary"),
                          Freq = nrow(dfimputed) * c(50.7, 49.3)) 

cohort.dist <- data.frame(Cohort = c("GenZ and Millennial", "Gen X", "Boomer", "Silent"),
                       Freq = nrow(dfimputed) * c(0.28262545, 
                                                  0.204643006, 
                                                  0.237327929, 
                                                  0.080095064))

edu.dist <- data.frame(Education = c("Low", "Medium", "High"), Freq = nrow(dfimputed) * c(0.429,0.304, 0.267))  #less than college, some college, completed college or more

race.dist <- data.frame(Ethnicity = c("White","Black", "Indigenous", "Other Person of Colour"),
                       Freq = nrow(dfimputed) * c(0.685, 0.043, .05, 0.22)) #All categories
 
# racebin.dist <- data.frame(Ethnicity2 = c("White","PoC"),
#                        Freq = nrow(dfimputed) * c(0.685, 0.315)) 
raceBW.dist <- data.frame(Ethnicity2 = c("White","Black", "NA"),
                       Freq = nrow(dfimputed) * c(0.685, 0.043, 0.272))
raceIW.dist <- data.frame(Ethnicity2 = c("White","Indigenous", "NA"),
                       Freq = nrow(dfimputed) * c(0.685, .05, 0.265))
raceoPoCW.dist <- data.frame(Ethnicity2 = c("White","Other Person of Colour", "NA"),
                       Freq = nrow(dfimputed) * c(0.685, 0.22, 0.095))

# compute the weights by putting together the marginal distributions from before with our data
library(survey)
dfimputed.rake <- rake(design = dfimputed.unweighted,
                       sample.margins = list(~Regions, ~Gender, ~Cohort, ~Education,
                                             ~Ethnicity,~BW, ~IW, ~oPOCW),
                       population.margins = list(region.dist, gender.dist, cohort.dist, edu.dist, race.dist, racebin.dist, raceBW.dist, raceIW.dist, raceoPoCW.dist))


# Trim the weights
summary(weights(dfimputed.rake)) # Some of the weights are much too large & too small

dfimputed.rake.trimmed <- trimWeights(dfimputed.rake, lower=0.3, upper=3, strict=TRUE) 
summary(weights(dfimputed.rake.trimmed)) # Now all the weights range between 0.3 and 3, which is more sensible
#str(dfimputed.rake.trimmed)

# Compare the data before and after 
prop.table(svytable(~Ethnicity, dfimputed.unweighted))
prop.table(svytable(~Ethnicity, dfimputed.rake.trimmed))

# Comparing svy mean and total in raked data
svymean(~Ethnicity, dfimputed.rake.trimmed)
svytotal(~Ethnicity, dfimputed.rake.trimmed)

svymean(~Ethnicity, dfimputed.unweighted) # and in unweighted data
svytotal(~Ethnicity, dfimputed.unweighted)

# Creating weights object in original dfimputed dataframe so we can use in our regressions:
dfimputed$WeightsRace <- weights(dfimputed.rake.trimmed)

write_rds(dfimputed, "dfCES2021imputed.R") #Make sure you are adding the right weights if there are different census weights available for 2020 or 2021 surveys.
save(dfimputed.rake.trimmed, file = "Rakeddata.RData")
load("Rakeddata.RData")

#Compare summary statistics for standardized and unstandardized
dfimputed %>% colnames()
dfimputed %<>%rename(`standardized police confidence`=stdPoliceConfidence, `Party ID` = PID,`Confidence politicians`=Confpoliticians, `Confidence feds`=Conffeds, `Confidence province`=Confprov, `Confidence police rescaled`=Confpolrs, `Confidence police`=Confpol)#For ggeffects and imputation

dfimputed$`standardized police confidence` %>% summary()
dfimputed$`Confidence police rescaled` %>% summary()
```

Note: Figures are saved by running cairo_pdf first, then the ggplot code, then dev.off. Run without cairo/dev.off first to make sure everything runs well, that the titles make sense, etc and flag any issue. No need to keep saving wrong plots. When you are sure they are good to save, run cairo first then turn off saving with dev.off.  
```{r Graphing average differences with CI}

dfimputed %>% rename(Variable = `Confidence police rescaled`) %>% #change variable
  drop_na(Variable) %>% 
  group_by(Ethnicity) %>% 
  group_modify(~ mean_se(.x$Variable, mult = 1.96)) %>% 
    #mutate(Var = Variable) %>% 
  ggplot(aes(x = Ethnicity, y = y))+
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
  geom_hline(yintercept = .5, linewidth = 1, color = "red", alpha = .5)+
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
    scale_y_continuous("police confidence 0 to 1 scale")+ #yscale contiuous labels = scales::percent
  scale_x_discrete("Ethnicity")+
  #facet_wrap(Survey~.)+
  theme(axis.text.x = element_text(size = 12))

dfimputed %>% rename(Variable = `standardized police confidence`) %>% #change variable
  drop_na(Variable) %>% 
  group_by(Ethnicity) %>% 
  group_modify(~ mean_se(.x$Variable, mult = 1.96)) %>% 
    #mutate(Var = Variable) %>% 
  ggplot(aes(x = Ethnicity, y = y))+
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
  geom_hline(yintercept = .5, linewidth = 1, color = "red", alpha = .5)+
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
    scale_y_continuous("standardized measure")+ #yscale contiuous labels = scales::percent
  scale_x_discrete("Ethnicity")+
  #facet_wrap(Survey~.)+
  theme(axis.text.x = element_text(size = 12))

dfimputed %>% mutate(diffscore = `Confidence police rescaled`-`standardized police confidence` 
                     #, Category = case_match(Gender, "Women trans and non-binary"~"WTNB", .default = Gender)
                     ) %>% rename(Category = `Confidence police`) %>% #change variable
  drop_na(diffscore) %>% 
  group_by(Category) %>% 
  group_modify(~ mean_se(.x$diffscore, mult = 1.96)) %>% 
  ggplot(aes(x = Category , y = y))+ #If cat = `Confidence police orig` Category %>% fct_rev() or fct_relevel("Black", after = 3L) 
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
#    geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_y_continuous("confidence in 0 to 1 scale"#, limits = c(0, NA)
                       )+ #yscale contiuous labels = scales::percent
  scale_x_discrete("Police confidence response")+ #Ethnicity, Police confidence response
  #facet_wrap(Survey~.)+
  theme(axis.text.x = element_text(size = 12))+
  ggtitle("Difference between single-item & \nstandardized confidence in police")
          #, subtitle = "based on substantive response") 
dfimputed$`Party ID` %>% tabyl()

dfimputed %>% rename(Category = Ideology) %>% #change variable
  drop_na(diffscore) %>% 
  group_by(Category) %>% 
  group_modify(~ mean_se(.x$diffscore, mult = 1.96)) %>% 
  ggplot(aes(x = Category, y = y))+ 
    geom_errorbar(aes(ymin = ymin, ymax = ymax), width = 0.25, position = position_dodge(0.9)) +
#  geom_col(position = position_dodge(0.9)) + 
    geom_point(position = position_dodge(0.9), size=4, shape=21)+
    scale_y_continuous("confidence in 0 to 1 scale")+ #yscale contiuous labels = scales::percent
  scale_x_continuous("Ideology, imputed missing" )+ #Ethnicity, Police confidence response
  theme(axis.text.x = element_text(size = 12))+
  ggtitle("Difference between single-item & \nstandardized confidence in police")
          #, subtitle = "based on substantive response")

```

Note: Figures should be saved by running cairo_pdf first, then the ggplot code, then dev.off. Before saving, run without cairo/dev.off first to make sure everything runs well, that the titles make sense, etc and flag any issue. No need to keep saving wrong plots. When you are sure they are good to save, run cairo first then turn off saving with dev.off.  Fig 1 Grouped means differences

```{r Fig 1 CES t-tests}
#load("CES June 2021/RakeddataCES2021.RData") #MAke sure you use raked data
library(survey)
#Switch names back because it doens't play well
dfimputed$POB
dfimputed%<>% rename(PoB= POB, stdPoliceConfidence=`standardized police confidence`,  PID=`Party ID`, Confpoliticians=`Confidence politicians`, Conffeds=`Confidence feds`, Confprov=`Confidence province`, Confpolrs=`Confidence police rescaled`, Confpol=`Confidence police`)

#Standardized version

t1s <- svyttest(stdPoliceConfidence~Ethnicity, design = dfimputed.rake.trimmed)
t1B <- svyttest(stdPoliceConfidence~BW, design = df.weighted)
t1I <- svyttest(stdPoliceConfidence~IW, design = df.weighted)
t1V <- svyttest(stdPoliceConfidence~oPoCW, design = df.weighted)
t2s <- svyttest(stdPoliceConfidence~Gender, design = dfimputed.rake.trimmed) 
t3s <- svyttest(stdPoliceConfidence~Income, design = dfimputed.rake.trimmed)
t4s <- svyttest(stdPoliceConfidence~PoB, design = dfimputed.rake.trimmed)

ts_list <- list(t1s, t2s, t3s, t4s)

ts_list <- list(#t1s, t2s, 
  t1B, t1I, t1V, t4s, t3s)


t_dfs <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in ts_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfs <- rbind(t_dfs, vec)
}

# FIGURE S2
t_dfs %<>% mutate(Variable = c(#"Ethnicity", "Gender",  
  "Black\n(ref White)", "Indigenous", "PoC, other", "Immigrant\n(refCanadian-born)", 
  "Below poverty line\n(ref Above)"), 
                  Measure = "Normalized")

names(t_dfs) = c("mean", "lower", "upper", "Variable", "Measure")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 

t_dfs$Variable %>% unique() %>% sprintf('"%s"', .) %>% toString() %>% cat

cairo_pdf("2024 Revision/Fig1CES.pdf", height = 8.3, width = 8.3)
t_dfs %>% 
  ggplot(aes(x = Variable %>% fct_relevel("Black\n(ref White)", "Indigenous", "PoC, other", "Immigrant\n(refCanadian-born)", "Below poverty line\n(ref Above)"), y = mean)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("black"))+
  theme(axis.text.x = element_text(size = 14, angle = 90), 
        axis.text.y = element_text(size = 14))+ 
  scale_y_continuous(name = "Standardized\nconfidence") +
  scale_x_discrete(name = "CES 2021")
dev.off()
  
  
 t_dfs %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)")) 

#dev.off() #If cairo_pdf was created.

##Recode for rescaled unstandardized

t1u <- svyttest(Confpolrs~Ethnicity, design = dfimputed.rake.trimmed)
t2u <- svyttest(Confpolrs~Gender, design = dfimputed.rake.trimmed)
t3u <- svyttest(Confpolrs~Income, design = dfimputed.rake.trimmed)
t4u <- svyttest(Confpolrs~PoB, design = dfimputed.rake.trimmed)
t5u <- svyttest(Confpolrs~Ideology, design = dfimputed.rake.trimmed) #How does it make a difference in mean with a 10 level scale?
tu_list <- list(t1u, t2u, t3u, t4u)

t_dfu <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in tu_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfu <- rbind(t_dfu, vec)
}

t_dfu %<>% mutate(Variable = c("Ethnicity", "Gender", "Income", "PoB"), 
                  Estimate = "Single item")

names(t_dfu) = c("mean", "lower", "upper", "Variable", "Police confidence \nestimate")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save, check limits when using different datasets

t_dfu %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "WTNB (ref Male)"))

bind_rows(t_dfs, t_dfu) %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  `Police confidence \nestimate`)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=4, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
   labs(x = "", y = "") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+
  scale_y_continuous(name = "Confidence in Police, CES 2021")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "Women, \ntrans and non-binary (ref Male)"))
#dev.off() #If cairo_pdf was created.

#Individual race for #StatscanTtests_flip2 CES2021 version
#Ttest for 4level above, look up BW or IW 
t1s <- svyttest(stdPoliceConfidence~Ethnicity, design = dfimputed.rake.trimmed)
t2s <- svyttest(stdPoliceConfidence~Gender, design = dfimputed.rake.trimmed) 
t3s <- svyttest(stdPoliceConfidence~Income, design = dfimputed.rake.trimmed)
t4s <- svyttest(stdPoliceConfidence~PoB, design = dfimputed.rake.trimmed)
ts_list <- list(t1s, tBWs, tIWs, tOPoCWs, t2s, t3s, t4s)

t_dfsBIN <- data.frame() # Below is a function that going to wrap the diff of means, lower, and upper bounds into a dataframe for plotting
for (item in ts_list) {
  vec <- c(item$estimate, item$conf.int[1], item$conf.int[2]) 
  t_dfsBIN <- rbind(t_dfsBIN, vec)
}

t_dfsBIN %<>% mutate(Variable = c("Ethnicity", "Black\n(ref white)", "Indigenous", "Other person\nof colour", "Gender",  "Income", "PoB"), Estimate = "Normalized")

names(t_dfsBIN) = c("mean", "lower", "upper", "Variable", "Measurement")

# cairo_pdf("Figure 1.pdf", onefile = TRUE) #Uncomment to save once the limits are fixed, then run ggplot then dev.off. The best would be to have a common x scale between all, this will take some tinkering. Write the max upper and lower for each datasset for assessment. 
t_dfsBIN %>% filter(Variable!="Ethnicity") %>% 
  ggplot(aes(x = Variable %>% fct_relevel("Black\n(ref white)", "Indigenous", "Other person\nof colour", "Gender", "Income", "PoB"), y = mean, color =  Measurement)) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
                  # ylim(-0.165, 0.05) +
                  geom_hline(yintercept = 0, size = 1.5, color = "red") +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14, angle = 90), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  # coord_flip()+
  scale_y_continuous(name = "Confidence in Police")+ #adjust limits accordingly
  scale_x_discrete(name = "CES 2021
                   ", labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)","Gender" = "Women, trans\n& non-binary (ref Male)")) 

```

```{r Collated t-tests from Statscan}
SurveyTTests <- 
  tibble::tribble(
      ~Survey,        ~mean,       ~lower,       ~upper,   ~Variable, ~Measurement,
      "GSS34", -0.050103283, -0.061338041, -0.038868526, "Ethnicity",                    "Normalized",
      "GSS34",  0.007134871,  0.003183646,  0.011086097,    "Gender",                    "Normalized",
      "GSS34", -0.018322972, -0.023180755, -0.013465189,    "Income",                    "Normalized",
      "GSS34", -0.036181485,  -0.04059685, -0.031766121,       "PoB",                    "Normalized",
      "GSS34", -0.037288528, -0.065190114, -0.009386941, "Ethnicity",                   "Single item",
      "GSS34",   0.01458693,  0.006847624,  0.022326237,    "Gender",                   "Single item",
      "GSS34", -0.027407206, -0.037487437, -0.017326975,    "Income",                   "Single item",
      "GSS34",  0.008121496, -0.001220839,  0.017463832,       "PoB",                   "Single item",
        "ICC",  -0.08767892, -0.095698046,   -0.0796598, "Ethnicity",                    "Normalized",
        "ICC",   0.01449806,   0.01095581,   0.01804031,    "Gender",                    "Normalized",
        "ICC",  -0.02663856, -0.030708382,  -0.02256873,       "PoB",                    "Normalized",
        "ICC",  -0.25828766, -0.279576757,  -0.23699855, "Ethnicity",                   "Single item",
        "ICC",   0.01603261,  0.007699496,   0.02436573,    "Gender",                   "Single item",
        "ICC",  -0.02419192, -0.034159026,  -0.01422481,       "PoB",                   "Single item",
      "GSS35",  -0.05706769, -0.064361883,   -0.0497735, "Ethnicity",                    "Normalized",
      "GSS35",  0.000112789, -0.002837172,   0.00306275,    "Gender",                    "Normalized",
      "GSS35", -0.016343618, -0.020178528,  -0.01250871,    "Income",                    "Normalized",
      "GSS35", -0.017661441, -0.020433248,  -0.01488963,       "PoB",                    "Normalized",
      "GSS35", -0.077934624,  -0.09775402,  -0.05811523, "Ethnicity",                   "Single item",
      "GSS35",   0.01004145,  0.002728787,   0.01735411,    "Gender",                   "Single item",
      "GSS35", -0.036866677, -0.046608287,  -0.02712507,    "Income",                   "Single item",
      "GSS35",  0.044059124,  0.037044112,   0.05107413,       "PoB",                   "Single item",
    "CES2021", -0.107828709,  -0.13797937, -0.077678052, "Ethnicity",                    "Normalized",
    "CES2021", -0.007429161,  -0.01555199,  0.000693665,    "Gender",                    "Normalized",
    "CES2021", -0.004095469,  -0.01947637,  0.011285434,    "Income",                    "Normalized",
    "CES2021", -0.033148594,  -0.04489896, -0.021398227,       "PoB",                    "Normalized",
    "CES2021", -0.196412484,  -0.26014486, -0.132680111, "Ethnicity",                   "Single item",
    "CES2021", -0.043937145,  -0.06147509,   -0.0263992,    "Gender",                   "Single item",
    "CES2021", -0.044049296,  -0.07298168, -0.015116915,    "Income",                   "Single item",
    "CES2021", -0.024230562,  -0.04430785, -0.004153271,       "PoB",                   "Single item"
    )

view(Surveydiffscores)
view(SurveyTTests)
SurveyTTests$Survey %>% unique

#SurveyTTests$Survey %<>%fct_relevel("GSS34", "ICC","GSS35", "CES2021")
#cairo_pdf("2024 Revision/4Ttests Supplementary2.pdf", onefile = TRUE, height = 8.3, width = 8.3) #Turned coord_flip off
cairo_pdf("2024 Revision/4Ttests Supplementary.pdf", onefile = TRUE, height = 8.3, width = 8.3)
SurveyTTests %>% mutate(Survey = fct_relevel(Survey, "GSS34", "ICC","GSS35", "CES2021")) %>% 
  ggplot(aes(x = factor(Variable), y = mean, color =  Measurement)) +
                    geom_hline(yintercept = 0, size = 1.5, color = "red", alpha = 0.5) +
                  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = .75) +
                  geom_point(mapping=aes(x=Variable, y=mean), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
                  ylim(-0.3, 0.05) +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14),# angle = 90),  #TO TURN
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
   coord_flip()+ 
    facet_wrap("Survey", nrow = 2)+
  scale_y_continuous(name = "Confidence in Police T-Test")+ #adjust limits accordingly , limits = c(-.17, .03)
  scale_x_discrete(name = "", labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Gender" = "Women, trans &\nnon-binary (ref Male)",
                            "Ethnicity" = "Person of colour\n(ref White)"))
dev.off()

SurveyTTests %>%  #filter(Variable  %in% c("other PoC","Indigenous", "Black")) %>% 
  mutate(Survey = fct_relevel(Survey, "GSS34", "ICC","GSS35", "CES2021")) %>% 
  filter(Measurement=="Single item") %>% 
ggplot(aes(x = Variable, y = mean, color =  Survey)) +
  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
             geom_hline(yintercept = 0, size = 1.5, color = "red", alpha = 0.5) +
 geom_point(mapping=aes(x=Variable, y=mean), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
        scale_color_manual(values = c("cornflowerblue","darkred", "darkorchid1", "yellowgreen"))+ 
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+#guides(colour = guide_legend(nrow = 2))+
  scale_y_continuous(name = "Confidence in Police T-Test")+
  scale_x_discrete(name = "", labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Gender" = "Women, trans &\nnon-binary (ref Male)",
                            "Ethnicity" = "Person of colour\n(ref White)"))+
    ggtitle("Single-item")

SurveyTTests %>%  #filter(Variable  %in% c("other PoC","Indigenous", "Black")) %>% 
  mutate(Survey = fct_relevel(Survey, "GSS34", "ICC","GSS35", "CES2021")) %>% 
  filter(Measurement!="Single item") %>% 
ggplot(aes(x = Variable, y = mean, color =  Survey)) +
  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
             geom_hline(yintercept = 0, size = 1.5, color = "red", alpha = 0.5) +
 geom_point(mapping=aes(x=Variable, y=mean), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
        scale_color_manual(values = c("cornflowerblue","darkred", "darkorchid1", "yellowgreen"))+ 
  theme+
   theme(axis.text.x = element_text(size = 14), 
         axis.text.y = element_text(size = 14))+ #if we're adding x instead of legends
  coord_flip()+#guides(colour = guide_legend(nrow = 2))+
  scale_y_continuous(name = "Confidence in Police T-Test")+
  scale_x_discrete(name = "", labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Gender" = "Women, trans &\nnon-binary (ref Male)",
                            "Ethnicity" = "Person of colour\n(ref White)"))+
    ggtitle("Standardized")

SurveyTTests %>% 
  ggplot(aes(x = factor(Variable), y = mean, color = Measurement)) +
  geom_hline(yintercept = 0, size = 1.5, color = "tomato3", alpha = 5) +
  geom_errorbar(aes(ymax = upper, ymin = lower), position = position_dodge(0.9), width = 0.5, size = 1) +
  geom_point(mapping=aes(x=Variable, y=mean), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
  ylim(-0.3, 0.055) +
    scale_color_manual(values = c("grey35", "blue"))+
  theme+
   theme(axis.text.x = element_text(size = 14, angle = 90), #if we're adding x instead of legends
         axis.text.y = element_text(size = 14))+ 
#  coord_flip()+
  facet_wrap(Survey %>% fct_relevel("GSS34","ICC", "GSS35", "CES 2021")~., nrow = 1)+
  scale_y_continuous(name = "Confidence in Police T-Test", limits = c(-0.3, 0.055))+ #adjust limits accordingly
  scale_x_discrete(name = "", labels=c("PoB" = "Immigrant\n(ref Canadian-born)",
                            "Income" = "Below poverty line\n(ref above the poverty)",
                            "Ethnicity" = "Person of colour\n(ref White)", 
                            "Gender" = "Women, trans \n& non-binary (ref Male)"))
dev.off()
 
```

```{r Differences in SI and std measures}

Surveydiffscores <- tibble::tribble(
    ~Survey,                ~Variable,                        ~Level, ~`average estimate difference`,    ~ymin,    ~ymax,
    "GSS34",              "Ethnicity",                       "White",        0.204,    0.201,    0.207,
    "GSS34",              "Ethnicity",                       "Black",        0.222,    0.201,    0.243,
    "GSS34",              "Ethnicity",                  "Indigenous",        0.122,    0.112,    0.132,
    "GSS34",              "Ethnicity",      "Other Person of Colour",        0.232,    0.224,     0.24,
    "GSS34",                 "Income",               "Above poverty",        0.201,    0.198,    0.204,
    "GSS34",                 "Income",          "Below poverty line",         0.19,    0.184,    0.197,
    "GSS34",                 "Gender",                        "Male",        0.192,    0.188,    0.196,
    "GSS34",                 "Gender", "Women, trans\n& non-binary",        0.204,    0.201,    0.208,
    "GSS34",         "Place of birth",                      "Canada",        0.191,    0.187,    0.194,
    "GSS34",         "Place of birth",            "Other birthplace",        0.235,    0.229,    0.242,
    "GSS34", "Confidence\nresponse",         "1, a lot of confidence",        0.369,    0.367,    0.372,
    "GSS34", "Confidence\nresponse",                  "0.66",        0.127,    0.125,    0.129,
    "GSS34", "Confidence\nresponse",         "0.33",       -0.149,   -0.154,   -0.143,
    "GSS34", "Confidence\nresponse",     "0 confidence",       -0.414,   -0.425,   -0.402,
      "ICC",              "Ethnicity",                       "White",        0.114,    0.112,    0.116,
      "ICC",              "Ethnicity",                       "Black",      -0.0509,  -0.0647,   -0.037,
      "ICC",              "Ethnicity",                  "Indigenous",     -0.00416,  -0.0165,  0.00815,
      "ICC",              "Ethnicity",      "Other Person of Colour",        0.082,   0.0755,   0.0884,
      "ICC",                 "Income",               "Above poverty",           NA,       NA,       NA,
      "ICC",                 "Income",          "Below poverty line",           NA,       NA,       NA,
      "ICC",                 "Gender",                        "Male",       0.0992,   0.0952,    0.103,
      "ICC",                 "Gender", "Women, trans\n& non-binary",        0.102,   0.0994,    0.104,
      "ICC",         "Place of birth",                      "Canada",        0.101,   0.0987,    0.103,
      "ICC",         "Place of birth",            "Other birthplace",        0.101,   0.0962,    0.107,
      "ICC", "Confidence\nresponse",       "1, a lot of confidence",         0.36,    0.358,    0.362,
      "ICC", "Confidence\nresponse",                        "0.75",        0.193,    0.192,    0.194,
      "ICC", "Confidence\nresponse",                         "0.5",        0.026,   0.0246,   0.0274,
      "ICC", "Confidence\nresponse",                        "0.25",       -0.133,   -0.135,   -0.131,
      "ICC", "Confidence\nresponse",     "0 confidence",       -0.314,   -0.317,    -0.31,
    "GSS35",              "Ethnicity",                       "White",         0.17,    0.167,    0.173,
    "GSS35",              "Ethnicity",                       "Black",        0.146,    0.132,     0.16,
    "GSS35",              "Ethnicity",                  "Indigenous",       0.0984,   0.0825,    0.114,
    "GSS35",              "Ethnicity",      "Other Person of Colour",          0.2,    0.197,    0.204,
    "GSS35",                 "Income",               "Above poverty",        0.184,    0.181,    0.186,
    "GSS35",                 "Income",          "Below poverty line",         0.17,    0.164,    0.175,
    "GSS35",                 "Gender",                        "Male",        0.179,    0.175,    0.182,
    "GSS35",                 "Gender", "Women, trans\n& non-binary",        0.183,     0.18,    0.186,
    "GSS35",         "Place of birth",                      "Canada",        0.153,     0.15,    0.156,
    "GSS35",         "Place of birth",            "Other birthplace",        0.213,    0.209,    0.216,
    "GSS35", "Confidence\nresponse",         "1, a lot of confidence",      0.38,    0.378,    0.381,
    "GSS35", "Confidence\nresponse",                        "0.75",         0.19,    0.189,    0.192,
    "GSS35", "Confidence\nresponse",                         "0.5",       0.0088,  0.00709,   0.0105,
    "GSS35", "Confidence\nresponse",                        "0.25",        -0.16,   -0.164,   -0.157,
    "GSS35", "Confidence\nresponse",     "0 confidence",       -0.348,   -0.354,   -0.342,
  "CES2021",              "Ethnicity",                       "White",       0.0624,   0.0573,   0.0675,
  "CES2021",              "Ethnicity",                       "Black",       -0.035,  -0.0772,  0.00733,
  "CES2021",              "Ethnicity",                  "Indigenous",      -0.0214,  -0.0673,   0.0245,
  "CES2021",              "Ethnicity",      "Other Person of Colour",       0.0266,   0.0156,   0.0377,
  "CES2021",                 "Income",               "Above poverty",       0.0564,   0.0517,   0.0612,
  "CES2021",                 "Income",          "Below poverty line",       0.0143, -0.00377,   0.0324,
  "CES2021",                 "Gender",                        "Male",       0.0678,    0.061,   0.0745,
  "CES2021",                 "Gender", "Women, trans\n& non-binary",       0.0403,    0.034,   0.0465,
  "CES2021",         "Place of birth",                      "Canada",        0.054,    0.049,   0.0591,
  "CES2021",         "Place of birth",            "Other birthplace",       0.0478,   0.0361,   0.0594,
  "CES2021", "Confidence\nresponse",   "1, a lot of confidence",    0.284,    0.278,    0.291,
  "CES2021", "Confidence\nresponse",           "0.66",       0.0881,   0.0848,   0.0914,
  "CES2021", "Confidence\nresponse",         "0.33",       -0.137,   -0.142,   -0.133,
  "CES2021", "Confidence\nresponse",     "0 confidence",       -0.357,   -0.367,   -0.347,
  "CES2021",               "Ideology",                "0, left ideology",      -0.0411,  -0.0744, -0.00784,
  "CES2021",               "Ideology",                           "1",      -0.0103,  -0.0388,   0.0183,
  "CES2021",               "Ideology",                           "2",       0.0368,   0.0201,   0.0534,
  "CES2021",               "Ideology",                           "3",       0.0635,   0.0503,   0.0768,
  "CES2021",               "Ideology",                           "4",       0.0827,   0.0702,   0.0952,
  "CES2021",               "Ideology",                           "5",       0.0562,    0.046,   0.0664,
  "CES2021",               "Ideology",                           "6",       0.0634,   0.0516,   0.0752,
  "CES2021",               "Ideology",                           "7",       0.0502,   0.0383,   0.0621,
  "CES2021",               "Ideology",                           "8",       0.0619,   0.0475,   0.0763,
  "CES2021",               "Ideology",                           "9",       0.0409,   0.0116,   0.0702,
  "CES2021",               "Ideology",            "10, right ideology",       0.0245,  -0.0053,   0.0543)

Surveydiffscores %>% filter(!Variable %in% c("Confidence\nresponse", "Ideology"), Survey!= "CES2021") %>% summary(.$`average estimate difference`)
cairo_pdf("2024 Revision/Diffscores 3surveys.pdf", onefile = TRUE, height = 8.3, width = 17)
Surveydiffscores %>% mutate(Survey = fct_relevel(Survey, "GSS34", "ICC","GSS35", "CES2021"), 
                            Level = fct_relevel(Level, "White", "Black", "Indigenous", "Other Person of Colour",  "Above poverty", "Below poverty line", "Canada", "Other birthplace","Male", "Women, trans & non-binary", "0 confidence", "0.25", "0.33", "0.5", "0.66", "0.75",                                                 "1, a lot of confidence")) %>%
     # filter(Variable != "Ideology") %>%
   filter(Variable != "Ideology", Survey!="CES2021", Variable!= "Confidence\nresponse") %>%
   ggplot(aes(x = Level, y = `average estimate difference`, color =  Variable %>% fct_relevel("Ethnicity", "Income", "Gender", "Place of birth", "Confidence\nresponse", "Ideology"))) +
                    geom_hline(yintercept = 0, size = 1.5, color = "red", alpha = 0.5) +
                  #geom_errorbar(aes(ymax = ymax, ymin = ymin), position = position_dodge(0.9), width = 0.5, size = .75) +
  geom_point(mapping=aes(x=Level, y=`average estimate difference`), size=2, shape=21, fill="white", position= position_dodge(0.9)) +
  #scale_color_manual(values = c("grey35", "blue"))+
  theme+
  theme(axis.title.y = element_text(size = 16, colour = "black"),
        legend.title = element_text(size = 16, colour = "black"),
        legend.text = element_text(size = 14, colour = "black"),
        axis.text.x = element_text(size = 12, angle = 90), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 14))+ #if we're adding x instead of legends
  #coord_flip()+
  facet_wrap("Survey", nrow = 1)+
  guides(color = guide_legend(title = "Variables"))+
  scale_y_continuous(name = "Single - standardized\ndifference")+ 
  scale_x_discrete(name = "")
dev.off()
```


#Protest analysis
```{Protest analysis}
dfimputed$Protesting %>% summary()
dfimputed$Protesting %<>% recode("None" = 0, "Yes" = 1)
library(rms)
library(nnet)

dfimputed %<>% rename(POB=`Place of birth`,stdPoliceConfidence=`standardized police confidence`, PID=`Party ID`, Confpoliticians=`Confidence politicians`, Conffeds=`Confidence feds`, Confprov=`Confidence province`, Confpolrs=`Confidence police rescaled`, Confpol=`Confidence police`)


dfimputed %<>% rename(`Place of birth`=POB,`standardized police confidence`=stdPoliceConfidence, `Party ID`=PID, `Confidence politicians`=Confpoliticians, `Confidence feds`=Conffeds, `Confidence province`=Confprov, `Confidence police rescaled`=Confpolrs, `Confidence police`=Confpol)

ideologySIRaked <- lm(Confpolrs~Ethnicity + PID + Ideology, weights = WeightsRace, data = dfimputed) 
ideologyStdRaked <- lm(stdPoliceConfidence~Ethnicity +PID+Ideology, weights = WeightsRace, data = dfimputed) 
ideologySI <- lm(Confpolrs~Ethnicity +PID+Ideology, weights = Weights, data = dfimputed) 
ideologyStd <- lm(stdPoliceConfidence~Ethnicity +PID+Ideology, weights = Weights, data = dfimputed) 
stargazer(ideologyStdRaked, ideologySIRaked, type = "latex", 
          column.labels = c("Standardized", "Single-item"), 
          covariate.labels=c("Black\n(ref White)", "Indigenous", "Other Person of colour", 
                             "Bloc Quebecois\n(ref Liberal)", "Conservative", "Green", 
                             "NDP", "Other/No party ID", "Ideology 0-10"), 
          notes = "Raked weights")

stdmod.protest <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID + Ideology, x=TRUE, y=TRUE, weights = Weights, data = dfimputed, family=binomial)
stdmod.protestRaked <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID + Ideology, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

simod.protest <- Glm(Protesting == 1 ~ Confpolrs + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID + Ideology, x=TRUE, y=TRUE, weights = Weights, data = dfimputed, family=binomial)
simod.protestRaked <- Glm(Protesting == 1 ~ Confpolrs + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID + Ideology, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

stargazer(stdmod.protestRaked, simod.protestRaked, type= "text",
          column.labels = c("Standardized", "Single-item"),
          covariate.labels=c("Confidence in police", "Confidence in police", 
            "Black (ref White)", "Indigenous", "Other Person of colour", "Below poverty line (ref Above)", "Women, trans non-binary", "Born outside Canada (ref Canadian-born)", "Gen X (ref Younger)", "Boomer", "Silent", "West (ref Ontario)", "Atlantic", "British Columbia", "North", "Quebec", "Medium Ed (ref Low)", "High Ed", 
                             "Bloc Quebecois\n(ref Liberal)", "Conservative", "Green", 
                             "NDP", "Other/No party ID", "Ideology 0-10"))

simod.protest3 <- Glm(Protesting == 1 ~ Confpolrs + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)
stargazer(stdmod.protest2, simod.protest2, type= "text")
```

```{Fig 2 CES and StatsCan outputs}
#GSS35 std SE estimate = 0.205
(0.205*1.96)
.04-(.04*0.4018)
.04+(.04*0.4018)
.03-(.03*0.4018)
.03+(.03*0.4018)
.01-(.01*0.4018)
.01+(.01*0.4018)

#GSS35 SI SE estimate = 0.078 
# (0.078*1.96) 
# .17*-0.15288 # .17*0.15288
# .11+(.11*-0.15288) # .11+(.11*0.15288)
# .07+(.07*-0.15288) # .07+(.07*0.15288)
# .04+(.04*-0.15288) # .04+(.04*0.15288) 
# .02+(.02*-0.15288) # .02+(.02*0.15288) 

protestmodels <- tibble::tribble(
                      ~Survey,   ~Measurement, ~`Confidence level`, ~Predicted, ~ymin, ~ymax,
                      "GSS35", "Standardized",                 0,       0.39,  0.34982,  0.45018,
                      "GSS35", "Standardized",              0.21,        0.2,  0.17,  0.23,
                      "GSS35", "Standardized",              0.32,       0.14,  0.12,  0.15,
                      "GSS35", "Standardized",              0.42,       0.09,  0.08,  0.11,
                      "GSS35", "Standardized",              0.52,       0.06,  0.05,  0.07,
                      "GSS35", "Standardized",              0.61,       0.04,  0.03,  0.056,
                      "GSS35", "Standardized",               0.7,       0.03,  0.017946,  0.042054,
                      "GSS35", "Standardized",                 1,       0.01,  0.005982,  0.014018,
                      "GSS35",  "Single-item",                 0,       0.17,  0.15288,  0.18712,
                      "GSS35",  "Single-item",              0.25,       0.11,  0.0931832,  0.12288,
                      "GSS35",  "Single-item",               0.5,       0.07,  0.0592984,  0.0807016,
                      "GSS35",  "Single-item",              0.75,       0.04,   0.0338848, 0.0461152,
                      "GSS35",  "Single-item",                 1,       0.02,  0.0169424,  0.0230576,
                      "CES 2021", "Standardized",                 0,       0.31,  0.23,  0.42,
                      "CES 2021", "Standardized",              0.23,       0.25,  0.18,  0.33,
                      "CES 2021", "Standardized",              0.33,       0.22,  0.16,  0.29,
                      "CES 2021", "Standardized",              0.44,        0.2,  0.14,  0.26,
                      "CES 2021", "Standardized",              0.52,       0.18,  0.13,  0.24,
                      "CES 2021", "Standardized",              0.62,       0.16,  0.11,  0.21,
                      "CES 2021", "Standardized",              0.72,       0.14,   0.1,  0.19,
                      "CES 2021", "Standardized",                 1,        0.1,  0.06,  0.14,
                      "CES 2021",  "Single-item",                 0,       0.23,  0.17,  0.31,
                      "CES 2021",  "Single-item",              0.33,        0.2,  0.14,  0.26,
                      "CES 2021",  "Single-item",              0.67,       0.16,  0.12,  0.22,
                      "CES 2021",  "Single-item",                 1,       0.14,   0.1,  0.19)

cairo_pdf("2024 Revision/protestmodelsGSS35CES2021_line.pdf", onefile = TRUE, height = 8.3, width = 11)
protestmodels %>% 
 ggplot(., aes(`Confidence level`, Predicted, color = Measurement, fill = Measurement))+
  geom_line()+
  geom_ribbon(aes(ymin = ymin, ymax = ymax), alpha = .25)+
   scale_color_manual(values = c("grey35", "blue"))+
   scale_fill_manual(values = c("grey35", "blue"))+
  theme+
  theme(axis.title.y = element_text(size = 16, colour = "black"),
        legend.title = element_text(size = 16, colour = "black"),
        legend.text = element_text(size = 14, colour = "black"),
        axis.text.x = element_text(size = 12, angle = 90), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 14))+
  facet_wrap(Survey%>% fct_relevel("GSS35", "CES 2021")~.)+
  xlab("Confidence in Police")+ 
  ylab("Probability of protesting")+
  ggtitle("")
dev.off()

#Alt GSS35 rendering with line geom for clarity
cairo_pdf("2024 Revision/protestmodelsGSS35DVSI_line.pdf", onefile = TRUE, height = 8.3, width = 11)
protestmodels %>% filter(Survey=="GSS35" & Measurement=="Single-item") %>% 
 ggplot(., aes(`Confidence level`, Predicted, color = Measurement, fill = Measurement))+
  geom_line()+
  geom_ribbon(aes(ymin = ymin, ymax = ymax), alpha = .25)+
   scale_color_manual(values = c("grey35", "blue"))+
   scale_fill_manual(values = c("grey35", "blue"))+
  theme+
  theme(axis.title.y = element_text(size = 16, colour = "black"),
        legend.title = element_text(size = 16, colour = "black"),
        legend.text = element_text(size = 14, colour = "black"),
        axis.text.x = element_text(size = 12, angle = 90), 
        axis.text.y = element_text(size = 12), 
        strip.text = element_text(size = 14))+
    xlab("Confidence in Police")+ 
  ylab("Probability of protesting")+
  ggtitle("")
dev.off()

#cairo_pdf("Pr protest.pdf", onefile = TRUE)
library(ggeffects)
stdmod.protest%>%
  ggeffect(terms = c("stdPoliceConfidence[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
  xlab("std Confidence in Police")+
  ylab("Probability of protesting, no PID")+
  ggtitle("")

stdmod.protest2%>%  
  ggeffect(terms = c("stdPoliceConfidence[all]")) %>% 
  ggplot(., aes(x, predicted, fill = group, color = group))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25)+
  theme+
  xlab("std Confidence in Police")+
  ylab("Probability of protesting, PID included")+
  ggtitle("")

stdmod.protest2%>%  #Check if this works
  ggeffect(terms = c("stdPoliceConfidence[all]", "Ethnicity")) %>% 
  ggplot(., aes(x, predicted, fill = group, color = group))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25)+
  theme+
  xlab("std Confidence in Police")+
  ylab("Probability of protesting, PID included")+
  ggtitle("")

simod.protest%>%
  ggeffect(terms = c("Confpolrs[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess", color = "black")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
xlab("single-item\nConfidence in Police")+
  ylab("Probability of protesting, no PID")+
  ggtitle("")

simod.protest2%>%  #Check if this works
  ggeffect(terms = c("Confpolrs[all]")) %>% 
  ggplot(., aes(x, predicted))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25, fill = "grey25")+
  theme+
xlab("single-item\nConfidence in Police")+
  ylab("Probability of protesting, PID included")+
  ggtitle("")

simod.protest2%>%  #Check if this works
  ggeffect(terms = c("Confpolrs[all]", "Ethnicity")) %>% 
  ggplot(., aes(x, predicted, fill = group, color = group))+
  geom_smooth(se = TRUE, method = "loess")+
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = .25)+
  theme+
  xlab("single-item\nConfidence in Police")+
  ylab("Probability of protesting, PID included")+
  ggtitle("")
```

## Switching between renames
```{r Quick Renames block for ggeffect and imputation}
df %<>% rename(POB=`Place of birth`,stdPoliceConfidence=`standardized police confidence`, PID=`Party ID`, Confpoliticians=`Confidence politicians`, Conffeds=`Confidence feds`, Confprov=`Confidence province`, Confpolrs=`Confidence police rescaled`, Confpol=`Confidence police`)

df %<>% rename(`Place of birth`=POB,`standardized police confidence`=stdPoliceConfidence, `Party ID`=PID, `Confidence politicians`=Confpoliticians, `Confidence feds`=Conffeds, `Confidence province`=Confprov, `Confidence police rescaled`=Confpolrs, `Confidence police`=Confpol)

```

# Analysis with finalized cleaned weighted and imputed data
```{r Summaries, include=FALSE}
dfimputed <- read_rds("dfCES2021imputed.R")

getMode <- function(df) {
  ux <- na.omit(unique(df))
  ux[which.max(tabulate(match(df, ux)))]
}


summaryT <- dfimputed %>% select(`Confidence police rescaled`, Protesting, Ethnicity, Income, 
                                 `Place of birth`, Gender, Cohort, Education, Regions)
Mode <- apply(summaryT%>% select(where(is.factor)), 2, getMode)

summaryT %>% select(-`Confidence police rescaled`) %>% map(~(tabyl(.) %>% adorn_totals("row") %>% adorn_pct_formatting()))
dfimputed$`Confidence police rescaled` %>% summary


library(gt)
x <- c("Confidence police rescaled", "Protesting", "Ethnicity", "Income", "Place of birth", "Gender", "Cohort", "Education", "Regions")

summaryT %>% 
  summarise_all(.funs = list(
    `Mean or mode` = function(x)ifelse(is.numeric(x), sprintf("%.3f", mean(x, na.rm = TRUE)), as.character(getMode(x))), 
    SD = function(x)ifelse(is.numeric(x), round(sd(x, na.rm = TRUE), 2), NA_real_), 
    `Min or first level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", min(x, na.rm = TRUE)), levels(x)[1]), 
    `Max or last level` = function(x)ifelse(is.numeric(x), sprintf("%.0f", max(x, na.rm = TRUE)), levels(x)[length(levels(x))]), 
    n = function(x)sum(!is.na(x))
  )) %>% 
  pivot_longer(everything(),
        names_to = c("Variable", ".value"),
        names_pattern = "(.+)_(.+)") %>%
  mutate(Variable = factor(Variable, levels = x)) %>%
  arrange(Variable) %>% gt(rownames_to_stub = TRUE) %>%
  tab_header(title = "Descriptive statistics", subtitle = "CES2021") %>% gtsave("descriptives.tex")
  #tab_CES2021_note("SD NA for categorical variables") 
```

```{r orig Effects regressions}
library(rms)
library(nnet)
dfimputed2021 <- read_rds("CES June 2021/dfCES2021imputed.R")
dfimputed$Protesting %<>% recode("None" = 0, "Yes" = 1)
mod.protest <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)
mod.protest2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

dfimputed %<>% mutate(Agesq=Age^2)
mod.protestAge <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)
mod.protestAge2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

mod.votechoice <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education+PID, weights = WeightsRace, Hess = TRUE, data = dfimputed)
mod.votechoice2 <- multinom(VoteFor ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education+PID, weights = WeightsRace, Hess = TRUE, data = dfimputed)

mod.pid <- multinom(PID ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education, weights = WeightsRace, Hess = TRUE,data = dfimputed)
mod.pid2 <- multinom(PID ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education, weights = WeightsRace, Hess = TRUE,data = dfimputed)

dfimputed$VotedB %<>% recode("No" = 0, "Yes" = 1)
mod.voted <- Glm(VotedB == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)
mod.voted2 <- Glm(VotedB == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Cohort + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

mod.votedAge <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)
mod.votedAge2 <- Glm(Protesting == 1 ~ stdPoliceConfidence + Ethnicity2 + Income + Gender + POB+ Age+ Agesq + Regions + Education + PID, x=TRUE, y=TRUE, weights = WeightsRace, data = dfimputed, family=binomial)

save(mod.protest, mod.protest2, mod.pid, mod.pid2, mod.voted, mod.voted2, mod.votechoice, mod.votechoice2,
     mod.protestAge, mod.protestAge2,mod.votedAge, mod.votedAge2,
     file = "EffectsModelsCES2021.RData")

#for stargazer to run, load the libraries and save without rms:: or nnet:: but avoid otherwise because rms will mask necessary packages so once you have saved the models you could restart the session and load(EffectsModelsCES2021.RData) without library(rms) or (nnet) loaded

library(stargazer)
stargazer(mod.votechoice, mod.votechoice2, type = "html", out = "votechoiceCES2021.html")

stargazer(mod.voted, mod.voted2, mod.protest, mod.protest2, title = "Logit models: Effect on protest and voting behavior",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), df = FALSE, type = "latex", 
 covariate.labels=c("Police confidence (std)", "Police confidence (std)", 
    "Person of colour",
    #Please add the Ethnicity order labels, ref should be "White". Double check by making the stargazer without running covariate.labels() argument first.
 "Below poverty line",
          "Male", "Born outside Canada", "Gen X", "Boomer", "Older", #"Age",  "$Age^2$",
 "West", "BC", "Atlantic", "Quebec",
          "Medium Education", "High Education", "PID: Other/None", "Bloc", "Conservative", "Green", "NDP"))

stargazer(mod.pid, mod.pid2,
          title = "PID effect (reference party: Liberal)",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), df = FALSE, type = "html", out="Effects_PID.html", 
 covariate.labels=c("Police confidence", "PoC (ref White)", 
                        #Please add the Ethnicity order labels, ref should be "White". Double check by making the stargazer without running covariate.labels() argument first.
                    "Above poverty line",
          "Male", "Born outside Canada", "Gen X (ref GenZ+Millen)", "Boomer", "Older",
          "West (ref Ontario)", "BC", "Atlantic", "Quebec",
          "Medium Education", "High Education"))

stargazer(mod.votechoice, 
          title = "Party vote choice effect (reference party: Liberal)",
          align=TRUE, 
          no.space=TRUE,
          star.cutoffs = c(0.05, 0.01, 0.001), df = FALSE, type = "latex", #out="Effects_VoteFor.html",
 covariate.labels=c("Police confidence", "Person of colour", "Below poverty line", "Male", "Born outside Canada", "Gen X", "Boomer", "Older", 
                             "West (ref Ontario)", "BC", "Atlantic", "Quebec",
          "Medium", "High","Other/none", "Bloc", "Conservative", "Green", "NDP"))

```
